#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright (c) 2017 Baidu.com, Inc. All Rights Reserved
# 

"""
File: unit4.py
Author: zhangyang(zhangyang40@baidu.com)
Date: 2018/2/5 下午1:45
"""
import warnings

warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

from gensim import corpora, models, similarities

if __name__ == '__main__':
    dictionary = corpora.Dictionary.load('data/deerwester.dict')
    corpus = corpora.MmCorpus('data/deerwester.mm')
    print(corpus)
    lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=2)
    doc = "Human computer interaction"
    vec_bow = dictionary.doc2bow(doc.lower().split())
    vec_lsi = lsi[vec_bow]
    print(vec_lsi)
    index = similarities.MatrixSimilarity(lsi[corpus])
    index.save('data/deerwester.index')
    index = similarities.MatrixSimilarity.load('data/deerwester.index')
    sims = index[vec_lsi]
    print(list(enumerate(sims)))
    sims = sorted(enumerate(sims), key=lambda item: -item[1])
    print(sims)
